# Figure_A02.R

# Part of the replication archive for 
#
#   Bullock, John G. 2020. "Education and Attitudes toward Redistribution in
#   the United States." British Journal of Political Science 50.


# This file produces Figure A2 in the appendix to article: "Average Strictness 
# of Compulsory Schooling Laws and Average Years of Schooling."



library(Bullock, lib.loc = c(.libPaths(), 'packageLibrary'))   # for split_fac(), sumNA()
library(car)       # for Recode()
library(dplyr)     # for %>%, bind_rows, left_join
library(lattice)
library(grid)
library(tibble)    # for rownames_to_column

source('CSL_coding.R')    
source('functions/mergeStateControlVars.R')  
source("IV_setup.R")             



##############################################################################
# PRELIMINARIES FOR PRINTING THE FIGURE 
##############################################################################
dirOutput        <- 'float_output/'
filenameStem     <- 'Figure_A02' 
PDF_title        <- 'Figure A2: Over-time variation in compulsory attendance laws'
PS_width         <- 9.5
PS_height        <- 12.0
postscriptBackground    <- 'transparent'
panelWidth       <- list (1.50, "inches")  
panelHeight      <- list(3.00, "inches")    
plotLineColor    <- 'black'
plotLineWidth    <- 1
panelBorderCol   <- 'black'
panelNumberCol   <- grey(.2)  # color of number of each panel; lower values are darker
panelBorderWidth <- .3
panelLayout      <- c(3, 1)   # columns, then rows
xBetween         <- 3         # space between columns
yBetween         <- 1.3175    # space between rows
baseCexSize      <- 1 
xLabSize         <- baseCexSize * .85
yLabSize         <- baseCexSize
stripTextSize    <-  .75      # cex
xAxisTextSize    <-  .8*.87   # cex
yAxisTextSize    <-  .8*.87   # cex
axisTickSize     <- .4

xAxisLabels <- list(
  qw('1920 1940 1960 1980 2000'),
  qw('1920 1940 1960 1980 2000'),
  qw('1970 1980 1990 2000'))
xAxis <- list(
  draw        = TRUE, 
  labels      = xAxisLabels, 
  at          = lapply(xAxisLabels, as.integer), 
  limits      = list(c(1910, 2010), c(1910, 2010), c(1965, 2005)),
  tck         = c(axisTickSize, 0), 
  col         = panelBorderCol, 
  cex         = xAxisTextSize,
  alternating = 1, 
  relation    = 'free', 
  axs         = 'i')  # axs='i' means that there is no padding around the xlimits

yAxisLimits <- list(
    c(0, .85),
    c(5.5, 14.5),
    c(5.5, 14.5))  # last two panels should have same limits and labels 
yAxisLabels <- list(
  qw('.1 .2 .3 .4 .5 .6 .7 .8'),
  seq(ceiling(yAxisLimits[[2]][1] + .001), floor(yAxisLimits[[2]][2] - .001)),
  seq(ceiling(yAxisLimits[[3]][1] + .001), floor(yAxisLimits[[3]][2] - .001))) 
yAxis <- list(
  draw        = TRUE,
  labels      = yAxisLabels, 
  at          = lapply(yAxisLabels, as.numeric),
  limits      = yAxisLimits,
  tck         = c(axisTickSize, 0),
  col         = panelBorderCol,
  cex         = yAxisTextSize,
  alternating = 1,
  relation    = 'free',  # but the ylim will constrain all panels to same height
  rot         = 90,
  axs         = 'i')

                 

##############################################################################
# MERGE CSL DATA WITH POPULATION DATA
##############################################################################
# In any given year, different states had different compulsory attendance laws.   
# In the right-hand panel of the figure, I weight the laws of each state by  
# their population.  
CSLdata       <- subset(CSLdata, !state%in%c('AK', 'DC', 'HI'), c(state, year, CA))
CSLdata$state <- droplevels(CSLdata$state)
CSLdata$CA    <- Recode(CSLdata$CA, 'NA = 0')
stateYearVars <- mergeStateControlVars(factor(CSLdata$state), CSLdata$year, colElim = FALSE)
CSLdata       <- left_join(
  x  = CSLdata, 
  y  = stateYearVars[, qw('pop stateYoung yearYoung')], 
  by = c('state'='stateYoung', 'year'='yearYoung'))



##############################################################################
# CREATE THE DATA FRAME
##############################################################################
# CA_DF lists average compulsory attendance (CA) for each year. The average is 
# taken over all states in the data. The unweighted average counts each state
# equally. The weighted average weights each state by its population in the 
# given year. 
CA_DF <- expand.grid(
  year     = min(CSLdata$year):max(CSLdata$year),
  outcome  = NaN,
  groupVar = qw('unweighted weighted'),
  panel    = 'meanYearsRequired',
  stringsAsFactors = FALSE)
CA_DF$outcome <- c(
  as.numeric(by(CSLdata, CSLdata$year, function (x) mean(x$CA),                 simplify = TRUE)),
  as.numeric(by(CSLdata, CSLdata$year, function (x) weighted.mean(x$CA, x$pop), simplify = TRUE)))


makeANES_orGSS_DF <- function(df, dfSource, outcomeName) {
  with(df, tapply(get(outcomeName), yearInt, meanNA)) %>% 
    as.data.frame %>%
    rownames_to_column("year") %>%
    mutate(year = as.integer(year)) %>%
    add_column(groupVar = dfSource) %>%
    rename(outcome = ".") %>%
    .[complete.cases(.), ]
}

# Education, without censoring
meanYearsOfEdUncensored_DF <- bind_rows(
  makeANES_orGSS_DF(ANES.df, 'ANES', 'educYearsUncensored'),
  makeANES_orGSS_DF(GSS.df,  'GSS',  'educYearsUncensored')) %>%
  add_column(panel = 'meanYears') 

# High school completion, self-reported, and including some GEDs. The bump in
# the ANES completion rate between 2000 and 2002 is due to sampling strategy. 
meanHS_completion_DF <- bind_rows(
  makeANES_orGSS_DF(ANES.df, 'ANES', 'HSgrad'),
  makeANES_orGSS_DF(GSS.df,  'GSS',  'HSgrad')) %>%
  add_column(panel = 'meanCompletion')

# States with moderate schooling laws
tmp_mod    <- tapply(CSLdata$CA>=8 & CSLdata$CA<=10, CSLdata$year, mean) %>%
  as.data.frame %>%
  rownames_to_column('year')
tmp_strict <- tapply(CSLdata$CA>10,                  CSLdata$year, mean) %>%
  as.data.frame %>%
  rownames_to_column('year')
CA_strictness_DF <- bind_rows(tmp_mod, tmp_strict) %>%
  rename(outcome = '.') %>%
  mutate(year = as.integer(year)) %>%
  add_column(
    groupVar = rep(qw('moderate strict'), each = nrow(.)/2),
    panel = 'strictness')

# DATA FRAME FOR THREE PANELS
combined_DF <- bind_rows(CA_strictness_DF, CA_DF, meanYearsOfEdUncensored_DF) %>%
  mutate(panel = factor(panel, levels = qw('strictness meanYearsRequired meanYears')))  



##############################################################################
# PANEL FUNCTION
##############################################################################
sparklinePanel <- function(...) { 
  panel.xyplot(...)
  
  # LABEL THE LINES
  panel_1_lineLabels <- list(
    label = c('moderate laws', 'strict laws'),
    x     = unit(c(1951,         1950.5), 'native'),
    y     = unit(c(.80,          .2),     'native')) 
  panel_2_lineLabels <- list(
    label = c('unweighted', paste0('population\U00AD', 'weighted')),
    x     = unit(c(1921.75,  1928.75),  'native'),
    y     = unit(c(7.25,     11.325),    'native')) 
  panel_3_lineLabels <- list(
    label = c('ANES',    'GSS'),
    x     = unit(c(1970, 1992),   'native'),
    y     = unit(c(10.9, 13.75),  'native')) 
  panel_labelLists <- list(panel_1_lineLabels, panel_2_lineLabels, panel_3_lineLabels)
  grid.text(
    label = panel_labelLists[[panel.number()]]$label,
    x     = panel_labelLists[[panel.number()]]$x,
    y     = panel_labelLists[[panel.number()]]$y,
    just = c('left', 'top'),
    gp    = gpar(cex=.75*yLabSize, col=plotLineColor))        
  
  
  # ADD INNER TICKS
  panel.axis(
    side     = 'top', 
    at       = xAxis$at[[panel.number()]], 
    labels   = FALSE,
    outside  = FALSE,
    half     = FALSE,
    tck      = 1 * axisTickSize)
  panel.axis(
    side     = 'right', 
    at       = yAxis$at[[panel.number()]], 
    labels   = FALSE,
    outside  = FALSE,
    half     = FALSE,
    tck      = 1 * axisTickSize)          
}



##############################################################################
# PRINT THE PDF FILE
##############################################################################
# When only one line is plotted per panel, plot.line governs the appearance of 
# the line.  When more than one line is plotted per panel, superpose.line 
# governs the appearance of the lines.
pdf(
  file   = paste0(dirOutput, filenameStem, '.pdf'), 
  width  = PS_width, 
  height = PS_height, 
  paper  = "special", 
  title  = PDF_title,
  bg     = postscriptBackground)
trellis.par.set(
  axis.components = list(     
    left   = list(
      pad1 = 0.50,    # distance between axis ticks and tick labels
      pad2 = 1.00)),  # distance between tick labels and y-axis label  
  axis.line = list(
    alpha = 1, 
    col   = panelBorderCol, # to eliminate panel border, set col to "white"
    lty   = 1, 
    lwd   = panelBorderWidth),  
  plot.line = list(alpha = 1, col = plotLineColor, lty = 1, lwd = 1),
  superpose.line = list(
    alpha = c(1,1), 
    col   = c('black', plotLineColor), 
    lty   = c("solid", "43"), 
    lwd   = c(1, plotLineWidth)))



CSL_sparklines_national <- xyplot(
  x        = outcome ~ year | panel,
  groups   = groupVar,
  data     = combined_DF, 
  type     = 'l',
  layout   = panelLayout,
  between  = list(x = xBetween, y = NULL),
  panel    = sparklinePanel,
  strip    = FALSE,
  scales   = list(x=xAxis, y=yAxis),
  xlab     = '',
  ylab     = NULL,
  par.settings = list(clip = list(panel = "on"))
)
print(CSL_sparklines_national, panel.width=panelWidth, panel.height=panelHeight)


# VERTICAL PANEL LABELS
# Clipping issues prevent me from adding these labels from within the panel
# function.
yAxisText <- c(
  'proportion of states',
  'average years of schooling required',
  'average years of schooling')
for (panelNum in 1:3) {    
  trellis.focus("panel", panelNum, 1, clip.off = TRUE, highlight = FALSE)
  grid.text(
    label = yAxisText[panelNum], 
    x     =  -.225, 
    y     = .5, 
    just  = 'center', 
    rot   = 90,
    gp    = gpar(font=1, cex=yLabSize)) 
  trellis.unfocus()
}

dev.off()


if (!(Sys.which('pdfcrop')=='' | Sys.which('perl')=='')  |  'pdfcrop' %in% installed.packages()[, 'Package']) {  # if "pdfcrop" is installed 
  system(paste(
    if (Sys.info()['sysname']=='Windows') paste(Sys.getenv('Comspec'), '/c ') else '',
    'pdfcrop', 
    paste0(dirOutput, filenameStem, '.pdf'), 
    paste0(dirOutput, filenameStem, '_crop.pdf')))
  file.remove(paste0(dirOutput, filenameStem, '.pdf'))
  if (interactive() & Sys.info()['sysname']=='Windows') shell.exec(paste0(normalizePath(dirOutput), filenameStem, '_crop.pdf'))
}